County Figures
theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 16, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size = 12), #font size
axis.text = element_text( #axis text
size = 9),
legend.text = element_text(size = 12),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 11, color="white"),
strip.text.y = element_text(size = 11, color="white",angle=270),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}
pal <- c("red", "#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c("Covidestim",
'$Pr(S_1|untested)$ Centered at Survey Value',
'$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value"
)targets_dir <- here("_targets")
county <- tar_read(ma_v1,
store =targets_dir) %>%
mutate(version = "v1") %>%
bind_rows(
tar_read(ma_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v4,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v5,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v6,
store =targets_dir)
)%>%
bind_rows(
tar_read(ma_v7,
store =targets_dir)
)
waste <- tar_read(waste, store =targets_dir)
county_names <- read_tsv(here("data/demographic/county_fips.tsv")) %>%
rename_with(.cols = everything(), .fn =tolower) %>%
bind_rows()
covidestim <- tar_read(covidestim_biweekly_county,
store =targets_dir) %>%
select(-c(date)) %>%
distinct()
dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
FILT_VERSIONS <- c("v3", "v5", "v6", "v7")
if(filter_versions) {
county <- county %>%
filter(version %in% FILT_VERSIONS)
}convert_labels <- function(labels, to_newlines=TRUE) {
if(to_newlines) {
gsub("$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\overset{Priors\\,Do\\,Not\\,Vary}{\\,by\\,State\\,and\\,Date}$",
as.character(labels),
fixed=TRUE) %>%
gsub("$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$\\overset{Pr(S_1|untested)\\,and\\,beta}{Centered\\,at\\,Survey\\,Values}$",
.,
fixed=TRUE)
}
else {
gsub(
"$\\overset{Priors\\,Do\\,Not\\,Vary}{\\,by\\,State\\,and\\,Date}$",
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
as.character(labels),
fixed=TRUE) %>%
gsub("$\\overset{Pr(S_1|untested)\\,and\\,beta}{Centered\\,at\\,Survey\\,Values}$",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
.,
fixed=TRUE)
}
}wjoined <- county %>%
# set Nantucket, Duke fips to Nantucket since we have wastewater data
# for Nantucket
mutate(fips = ifelse(grepl("25019", fips), "25019", fips)) %>%
inner_join(waste) %>%
left_join(covidestim, relationship = "many-to-many") %>%
left_join(dates, relationship = "many-to-many") %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate = max(date)) %>%
ungroup()
counties_with_waste <- wjoined %>%
group_by(fips) %>%
mutate(obs_w_notna = sum(!is.na(mean_conc))) %>%
filter(obs_w_notna != 0) %>%
pull(fips) %>%
unique()
versions <- tibble(
version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$\\beta$ Centered at Survey Value (Spline Smoothed)",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$\\beta$ Centered at Survey Value (Spline Smoothed)",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
) )
)
wjoined <- wjoined %>%
left_join(versions)
if(filter_versions) {
county <- county %>%
left_join(versions) %>%
mutate(vlabel=case_when(
version == "v7" ~ "$\\overset{Priors\\,Do\\,Not\\,Vary}{\\,by\\,State\\,and\\,Date}$",
version == "v5" ~ "$\\beta$ Centered at Survey Value",
version =="v6" ~ "$\\overset{Pr(S_1|untested)\\,and\\,beta}{Centered\\,at\\,Survey\\,Values}$",
version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value",
)) %>%
mutate(vlabel =factor(vlabel,levels= c(
"$\\overset{Priors\\,Do\\,Not\\,Vary}{\\,by\\,State\\,and\\,Date}$",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$\\beta$ Centered at Survey Value",
"$\\overset{Pr(S_1|untested)\\,and\\,beta}{Centered\\,at\\,Survey\\,Values}$"))) %>%
left_join(county_names) %>%
mutate(name = ifelse(fips =="25007,25019", "Nantucket and Dukes", name)) %>%
left_join(dates, relationship = "many-to-many") %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate = max(date)) %>%
ungroup()
county$vlabel %>% unique()
wjoined <- wjoined %>%
mutate(vlabel=case_when(
version == "v7" ~ "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
version == "v5" ~ "$\\beta$ Centered at Survey Value",
version =="v6" ~ "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value"
))
}Correlations
Normalization Strategy 1: Standardization
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(
mean_conc_std = (mean_conc - mean(mean_conc))/
sd(mean_conc),
exp_cases_median_std = (exp_cases_median - mean(exp_cases_median))/
sd(exp_cases_median),
infections_std = (infections - mean(infections))/
sd(infections),
exp_cases_lb_std = (exp_cases_lb - mean(exp_cases_lb))/
sd(exp_cases_lb),
exp_cases_ub_std = (exp_cases_ub - mean(exp_cases_ub))/
sd(exp_cases_ub),
positive_std = (positive - mean(positive))
/sd(positive)) %>%
ungroup()PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=3.5,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y,
label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c(axis.title = element_text(size=15),
strip.text=element_text(size=15, color='white', margin=margin(2,0,2,0))) +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c(axis.title = element_text(size=15),
strip.text=element_text(size=15, color='white',margin=margin(2,0,2,0))) +
# theme(strip.text.x = element_text(size=12)) +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations with Wastewater Concentrations")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=3.5,
y=.1)
plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c(axis.title = element_text(size=15),
strip.text=element_text(size=15, color='white',margin=margin(2,0,2,0))) +
# theme(strip.text.x = element_text(size=12)) +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "Correlations with Covidestim Infections")
plt_covidestim_pbCombined Figure
cor_covid <- covidestim_correlations %>%
select(vlabel,
`Correlation with Covidestim Estimates`=Correlation)
cor_waste <- correlations %>%
select(vlabel,
`Correlation with Wastewater Concentrations`=Correlation)
cor_waste %>%
left_join(cor_covid) %>%
mutate(vlabel = gsub(
"$\\overset{Priors\\,Do\\,Not\\,Vary}{\\,by\\,State\\,and\\,Date}$",
"Priors Do Not Vary by State and Date",
vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
rename(Implementation = vlabel) %>%
kbl() %>%
kable_styling(full_width = TRUE)| Implementation | Correlation with Wastewater Concentrations | Correlation with Covidestim Estimates |
|---|---|---|
| \(Priors\,Do\,Not\,Vary\,by\,State\,and\,Date\) | 0.8975680 | 0.9862114 |
| \(Pr(S_1|untested)\) Centered at Survey Value | 0.8959082 | 0.9853500 |
| \(\beta\) Centered at Survey Value | 0.8923456 | 0.9825170 |
| \(Pr(S_1|untested)\) and \(\beta\) Centered at Survey Values | 0.8880780 | 0.9804817 |
Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County)",
y = "Wastewater Concentrations\n(Standardized by County)",
title = "Correlations Between Wastewater Concentrations\nand Covidestim Estimates")Compare to Wastewater Over Time
library(scales)
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$Pr(S_1|untested)$ Centered at Survey Value',
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value"
)
w_data <- w_data %>%
group_by(name) %>%
mutate(adjust = 1/(max(mean_conc))*(exp_cases_median[which.max(mean_conc)])) %>%
ungroup()
waste_df <- w_data %>%
select(biweek, mean_conc, name, adjust, fips) %>%
distinct() %>%
left_join(dates,relationship = "many-to-many" ) %>%
group_by(name, biweek) %>%
# center date within 2-week interval
summarize(date = min(date) + days(7),
mean_conc =unique(mean_conc),
adjust=unique(adjust),
fips = unique(fips)) %>%
mutate(name = gsub("County, MA", "", name))
pltlist <- w_data %>%
mutate(name = gsub("County, MA", "", name)) %>%
left_join(dates,relationship = "many-to-many" ) %>%
# filter(version %in% c("v7","v3")) %>%
filter(version=="v7") %>%
filter(biweek>=6) %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate=max(date)) %>%
ungroup() %>%
group_by(name) %>%
group_split() %>%
map(~ {
county_fips <- .x$fips %>% unique()
adj <- .x$adjust %>% unique()
.x %>%
# filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5) +
geom_line(data = waste_df[which(waste_df$fips == county_fips),],
aes(x = date, y =mean_conc*adj,
color = 'Wastewater Effective Concentrations'),
# color = "#DB4048",
size = 1.1) +
geom_linerange(aes(xmin = mindate,
xmax = maxdate,
y = infections,
color = 'Covidestim Infection Estimates')) +
geom_point(data = waste_df[which(waste_df$fips == county_fips),],
aes(x = date, y =mean_conc*adj ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
scale_y_continuous(sec.axis=sec_axis(~.x/adj,
name = 'Mean Wastewater Effective Concentration',
labels=comma),
labels=comma
) +
# facet_grid(name~vlabel,
# labeller=labeller(vlabel =as_labeller(
# TeX, default=label_parsed))) +
facet_wrap(~name, ncol=3) +
scale_fill_manual(values = pal, labels = c('','','',''),
name = "Probabilistic Bias Intervals") +
theme_c(legend.position="none") +
labs(y = "Value (Standardized by County)",
title = "",
x= "") +
scale_x_date(date_labels = "%b %Y") +
scale_color_manual(
name = '',
values = c(
'Covidestim Infection Estimates' = 'darkblue',
'Wastewater Effective Concentrations' = '#DB4048')) +
guides(color = guide_legend(override.aes = list(size = 12,
linewidth=3)),
fill = guide_legend(override.aes =list(size = 6)))
})
legend <- cowplot::get_legend(
pltlist[[1]] + theme(legend.position="top")
)
grid <- cowplot::plot_grid(plotlist=pltlist, ncol =3)
cowplot::plot_grid(legend, grid, nrow=2, rel_heights=c(.05,.95))Normalization Strategy 2: By Values at Biweek Where Wastewater Concentration is Maximized
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(mean_conc_std = mean_conc/ max(mean_conc),
max_conc = max(mean_conc),
exp_cases_median_std = exp_cases_median / exp_cases_median[which.max(mean_conc)],
exp_cases_lb_std = exp_cases_lb /exp_cases_median[which.max(mean_conc)],
exp_cases_ub_std = exp_cases_ub / exp_cases_median[which.max(mean_conc)],
infections_std = infections/ infections[which.max(mean_conc)],
positive_std = positive/max(positive)) %>%
ungroup()PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")
plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
y = "Mean Wastewater Concentration\n(Standardized by County by County)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
y = "Covidestim Estimates\n(Standardized by County by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")
plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
y = "Covidestim Estimates\n(Standardized by County by County)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)
cor_covid <- covidestim_correlations %>%
select(vlabel,
`Correlation with Covidestim Estimates`=Correlation)
cor_waste <- correlations %>%
select(vlabel,
`Correlation with Wastewater Concentrations`=Correlation)
cor_waste %>%
left_join(cor_covid) %>%
mutate(vlabel = gsub(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"Priors Do Not Vary by County and Date",
vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
rename(Implementation = vlabel) %>%
kbl() %>%
kable_styling(full_width = TRUE)Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County by County)",
y = "Wastewater Concentrations\n(Standardized by County by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")
pb_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1) Compare to Wastewater Over Time
library(scales)
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$Pr(S_1|untested)$ Centered at Survey Value',
'$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value"
)
waste_df <- w_data %>%
select(biweek, mean_conc_std, name) %>%
distinct() %>%
left_join(dates,relationship = "many-to-many" ) %>%
group_by(name, biweek) %>%
# center date within 2-week interval
summarize(date = min(date) + days(7),
mean_conc_std=unique(mean_conc_std)) %>%
mutate(name = gsub("County, MA", "", name))
w_data%>%
mutate(name = gsub("County, MA", "", name)) %>%
left_join(dates,relationship = "many-to-many" ) %>%
filter(version %in% c("v7","v3")) %>%
filter(biweek>=6) %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate=max(date)) %>%
ungroup() %>%
# filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb_std,
ymax = exp_cases_ub_std,
fill = vlabel),
alpha = .5) +
geom_line(data = waste_df,
aes(x = date, y =mean_conc_std,
color = 'Wastewater Effective Concentrations'),
# color = "#DB4048",
size = 1.1) +
geom_linerange(aes(xmin = mindate,
xmax = maxdate,
y = infections_std,
color = 'Covidestim Infection Estimates')) +
geom_point(data = waste_df,
aes(x = date, y =mean_conc_std ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed))) +
scale_fill_manual(values = pal, labels = c('','','',''),
name = "Probabilistic Bias Intervals") +
theme_bw() +
theme_c() +
labs(y = "Value (Standardized by County by County)",
title = "",
x= "") +
scale_x_date(date_labels = "%b %Y") +
scale_color_manual(
name = '',
values = c(
'Covidestim Infection Estimates' = 'darkblue',
'Wastewater Effective Concentrations' = '#DB4048')) +
guides(color = guide_legend(override.aes = list(size = 12,
linewidth=3)),
fill = guide_legend(override.aes =list(size = 6)))Normalization Strategy 3: By Maximum (Separately)
For each county, normalize the wastewater concentration by the maximum concentration for that county and the Probabilistic Bias Infections by the maximum median estimated counts for that county (2.5th percentile and 97.5th percentile also standardized by the maximum median estimated counts for that county).
PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")
plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Mean Wastewater Concentration\n(Standardized by County)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")
plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
y = "Covidestim Estimates\n(Standardized by County)",
title = "")Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v7") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized by County)",
y = "Wastewater Concentrations\n(Standardized by County)",
title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")
pb_correlations <- w_data %>%
filter(version=="v7") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1) Simulation Intervals Over Time
county %>%
filter(version=="v7") %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_wrap(~name, scales="free_y", ncol=3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
axis.text.x=element_text(size=7),
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))county %>%
filter(fips %in% sample(unique(.data$fips),2)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Ratio of Estimated Infections to Observed
county %>%
filter(version=="v7") %>%
#filter(grepl("Nan",name)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb/positive,
ymax = exp_cases_ub/positive),
alpha = .5,
show.legend=FALSE,
fill="#596281") +
# geom_linerange(aes(xmin = mindate,
# xmax=maxdate,
# y = positive,
# color = 'obs')) +
facet_wrap(~name,ncol=3) +
# facet_grid(name~vlabel,
# labeller=labeller(vlabel =as_labeller(
# TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top") +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Ratio of Estimated to Observed Over Time",
subtitle = "Counties in Massachusetts") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Trends in Total Tests and Positive Tests
Informs our understanding of the trends in the ratio of estimated to observed infections.
Positive Tests and Test Positivity
############################################
# POSITIVE TESTS AND TEST POSITIVITY
############################################
pltlist <- county %>%filter(version=="v7") %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
mutate(adj = (1/max(testpos)) * max(positive)) %>%
group_split() %>%
map(~ {
adj <-unique(.x$adj)
.x %>%
ggplot(aes(x=date))+
geom_line(aes(y=positive, color = 'Positive Tests')) +
geom_point(aes(y=positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=testpos*adj, color='Test Positivity'),
linetype=2) +
geom_point(aes(y=testpos*adj, color='Test Positivity'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Test Positivity"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2),
linewidth=c(2,2)))) +
theme_c() +
theme(axis.title = element_text(size=10),
axis.text =element_text(size=9),
legend.position="none") +
scale_y_continuous(labels=comma, sec.axis = sec_axis(~./adj , name = 'Test Positivity')) +
labs(color ='',
y="Positive Tests",
x= "Date") +
facet_wrap(~name)
})
legend <- get_legend(
pltlist[[1]] +
theme(legend.position = "top",
legend.text=element_text(size=16))
)
title_gg <- ggplot() +
labs(title = "Comparing Trends in Positive Tests and Test Positivity") +
theme(plot.title=element_text(face="bold",
hjust = .5,
size = 18,
margin =margin(5,0,2,0)))
plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3)
plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)Total Tests and Positive Tests
#####################################
# TOTAL TESTS AND POSITIVE TESTS
#####################################
pltlist <- county %>%filter(version=="v7") %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
mutate(adj = (1/max(total)) * max(positive)) %>%
group_split() %>%
map(~ {
adj <-unique(.x$adj)
.x %>%
ggplot(aes(x=date))+
geom_line(aes(y=positive, color = 'Positive Tests')) +
geom_point(aes(y=positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=total*adj, color='Total Tests'),
linetype=2) +
geom_point(aes(y=total*adj, color='Total Tests'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Total Tests"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2)))) +
theme_c() +
theme(axis.title = element_text(size=11),
axis.text =element_text(size=8),
legend.position="none") +
scale_y_continuous(labels=label_scientific(),
n.breaks=4,
sec.axis = sec_axis(~./adj ,
name = 'Total Tests',
labels=label_scientific())) +
labs(color ='',
y="Positive Tests",
x= "Date") +
facet_wrap(~name)
})
legend <- cowplot::get_legend(
pltlist[[1]] +
theme(legend.position="top") +
guides(color = guide_legend(keywidth=2,
override.aes = list(linewidth=c(2,2),
linetype =c(1,2))))
)
title_gg <- ggplot() +
labs(title = "Comparing Trends in Total Tests and Positive Tests") +
theme(plot.title=element_text(face="bold",
hjust = .5,
size = 18,
margin =margin(5,0,2,0)))
plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3)
cowplot::plot_grid(title_gg, legend, plt, rel_heights =c(.03, .03,.94),ncol=1) Percent Change in Total Tests and Positive Tests
###################################################
# PERCENT CHANGE: TOTAL TESTS AND POSITIVE TESTS
###################################################
county %>%
filter(version=="v7") %>%
filter(!grepl("Nantucket",name)) %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(fips) %>%
arrange(biweek) %>%
mutate(pct_change_positive = (positive - lag(positive, 1))/lag(positive, 1),
pct_change_total = (total - lag(total, 1))/lag(total, 1)) %>%
ggplot(aes(x=date))+
geom_line(aes(y=pct_change_positive, color = 'Positive Tests')) +
geom_point(aes(y=pct_change_positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=pct_change_total, color='Total Tests'),
linetype=2) +
geom_point(aes(y=pct_change_total, color='Total Tests'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Total Tests"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2)))) +
theme_c() +
theme(axis.title = element_text(size=15),
axis.text.x=element_text(size=9),
legend.position="top") +
# scale_y_continuous(labels=comma, sec.axis = sec_axis(~./adj , name = 'Test Positivity')) +
labs(color ='',
y="Percent Change",
x= "Date",
title = "Percent Change in Positive Tests and Total Tests") +
facet_wrap(~name, ncol=3, scales="free_x") +
scale_y_continuous(limits = c(-1,5.5),
labels=percent) +
scale_x_date(date_breaks="3 months", date_labels="%b %Y")Concordance with Covidestim
wjoined <- wjoined %>%
mutate(name = gsub("County, MA","", name))
ma_concordance <- wjoined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", name )) %>%
select(-date) %>%
distinct() %>%
select(exp_cases_lb, exp_cases_ub,
name, biweek, vlabel, infections) %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(vlabel, in_interval) %>%
summarize(n_per_version = sum(n_per_county)) %>%
group_by(vlabel) %>%
mutate(total = sum(n_per_version)) %>%
ungroup() %>%
mutate(prop=n_per_version/total)
ma_concordance_by_county <- wjoined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", name )) %>%
select(-date) %>%
mutate(name = gsub("$", "",
name, fixed=TRUE)) %>%
select(exp_cases_lb, exp_cases_ub,
name, biweek, vlabel, infections) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(name,vlabel) %>%
mutate(total = sum(n_per_county),
prop = n_per_county/total) %>%
mutate(state="MA") %>%
ungroup()Concordance by County
##############################################
# stacked bar chart of concordance by county
##############################################
ma_concordance_by_county %>%
filter(vlabel=="$Pr(S_1|untested)$ Centered at Survey Value") %>%
group_by(name) %>%
mutate(m = prop[which(in_interval=="In Interval")]) %>%
ungroup() %>%
mutate(in_interval = factor(in_interval, levels = c("Above Interval",
"In Interval", "Below Interval"))) %>%
ggplot(aes(x= fct_reorder(name,m),
fill = (in_interval), y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Massachusetts: Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = TeX("For Implementation with $Pr(S_1|untested)$ Centered at Survey Value")) +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))Concordance by Version
#
in_interval <- ma_concordance %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n_per_version) %>%
select(vlabel, n_interval)
labels <- ma_concordance %>%
filter(in_interval=="In Interval") %>%
arrange((prop)) %>%
pull(vlabel) %>%
unique() %>%
as.character()
#########################################################
# CONCORDANCE BY VERSION
#########################################################
ma_concordance %>%
left_join(in_interval) %>%
mutate(in_interval=factor(in_interval,
levels=c("Above Interval",
"In Interval",
"Below Interval"))) %>%
ggplot(aes(y =fct_reorder(vlabel,n_interval),
x = prop,
fill=in_interval)) +
#facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
geom_bar(stat="identity",position="stack", color="darkgray", linewidth=.2) +
theme_c(axis.text.y = element_text(size = 14, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10),
legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt')) +
scale_y_discrete(labels=unname(TeX(labels))) +
labs(y ="",
x="Proportion Containing\nthe Covidestim Median",
fill = "Covidestim Median:",
title = "Where Covidestim Medians Fall\nRelative to Probabilistic Bias Intervals",
subtitle = "By Implementation, in Massachusetts") +
scale_fill_manual(values=c("Below Interval"="#7997D2",
"In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79"),
breaks = c("Below Interval",
"In Interval",
"Above Interval")) +
guides(fill=guide_legend(byrow=TRUE)) +
scale_x_continuous(n.breaks = 7,
expand=c(0,0),
limits=c(-.05,1))ma_concordance %>%
group_by(vlabel) %>%
mutate(total=sum(n_per_version)) %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n_per_version) %>%
ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
geom_text(aes(label=round(prop,3), x= prop+.03),
angle=0) +
geom_bar(stat="identity", fill="#596281") +
scale_y_discrete(breaks= unique(ma_concordance$vlabel),
labels=function(x)TeX(x)) +
scale_x_continuous(expand=c(0,0),
limits=c(0,.9), n.breaks=7) +
theme_c(axis.text.y = element_text(size = 13, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10)) +
labs(y="",
x="Proportion Containing the Covidestim Median",
title="Proportion Containing the Covidestim Median",
subtitle = "By Implementation")Faceted by Version
names(pal) <- convert_labels(labels(pal), to_newlines=TRUE)
county %>%
# only include dates where there were estimates for all implementations
filter(biweek >=6) %>%
mutate(name=gsub("Nantucket and Dukes", "Nantucket\nand Dukes", name)) %>%
mutate(vlabel=fct_reorder(vlabel,as.numeric(vlabel),.desc=TRUE)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .7,
show.legend=FALSE) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
strip.text.x=element_text(size=13,color='white'),
axis.title = element_text(size=17),
plot.title = element_text(size=24, hjust = .5, face="bold",
margin=margin(2,0,2,0)),
plot.subtitle = element_text(size=20, face="italic",
margin=margin(1,0,5,0))) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") Fill by Version
version_with_max <- function(variable) {
county %>%
mutate(vlabel = convert_labels(vlabel, to_newlines=FALSE)) %>%
select(-date) %>%
distinct() %>%
# only include biweeks where all implementations have estimates
filter(biweek >=6) %>%
group_by(biweek,fips) %>%
slice_max(n=1,order_by=!!sym(variable), with_ties=FALSE) %>%
group_by(vlabel) %>%
summarize(n=n()) %>%
ungroup() %>%
mutate(total=sum(n),
prop=n/total)
}
version_with_min <- function(variable) {
county %>%
mutate(vlabel = convert_labels(vlabel, to_newlines=FALSE)) %>%
select(-date) %>%
distinct() %>%
# only include biweeks where all implementations have estimates
filter(biweek >=6) %>%
group_by(biweek, fips) %>%
slice_min(n=1,order_by=!!sym(variable), with_ties=FALSE) %>%
group_by(vlabel) %>%
summarize(n=n()) %>%
ungroup() %>%
mutate(total=sum(n),
prop=n/total)
}
#
# county %>%
# select(-date) %>%
# distinct() %>%
# # only include biweeks where all implementations have estimates
# filter(biweek >=6) %>%
# group_by(biweek,fips) %>%
# # slice_min(n=1,order_by=!!sym(variable), with_ties=FALSE) %>%
# select(exp_cases_lb, biweek, fips,version) %>%
# pivot_wider(values_from = exp_cases_lb, names_from=version ) %>%
# mutate(diff=v3-v7) %>%
# pull(diff) %>%
# mean()
version_with_max_median <- version_with_max("exp_cases_median")
version_with_min_median <- version_with_min("exp_cases_median")
version_with_max_lb <- version_with_max("exp_cases_lb")
version_with_min_lb <- version_with_min("exp_cases_lb")
version_with_max_ub <- version_with_max("exp_cases_ub")
version_with_min_ub <- version_with_min("exp_cases_ub")
#############
# MEDIAN
#############
cat("* The implementatation that most frequently had the maximum median cases across the two-week intervals was",
as.character(version_with_max_median$vlabel[which.max(version_with_max_median$prop)]),
"where the median of",
version_with_max_median$prop[which.max(version_with_max_median$prop)] *100 %>% round(3),
"percent of simulation intervals was highest.")- The implementatation that most frequently had the maximum median cases across the two-week intervals was \(Pr(S_1|untested)\) and \(\beta\) Centered at Survey Values where the median of 99.38462 percent of simulation intervals was highest.
cat("* The implementatation that most frequently had the minimum median cases across the two-week intervals was",
as.character(version_with_min_median$vlabel[which.max(version_with_min_median$prop)]),
"where the median of",
version_with_min_median$prop[which.max(version_with_min_median$prop)] *100 %>% round(3),
"percent of simulation intervals was lowest.")- The implementatation that most frequently had the minimum median cases across the two-week intervals was \(Priors\,Do\,Not\,Vary\,by\,State\,and\,Date\) where the median of 100 percent of simulation intervals was lowest.
#################
# LOWER QUANTILE
#################
cat("* The implementatation that most frequently had the maximum 2.7% percentile in infections across the two-week intervals was",
as.character(version_with_max_lb$vlabel[which.max(version_with_max_lb$prop)]),
"where the median of",
version_with_max_lb$prop[which.max(version_with_max_lb$prop)] *100 %>% round(3),
"percent of simulation intervals was highest.")- The implementatation that most frequently had the maximum 2.7% percentile in infections across the two-week intervals was \(Pr(S_1|untested)\) and \(\beta\) Centered at Survey Values where the median of 85.84615 percent of simulation intervals was highest.
cat("* The implementatation that most frequently had the minimum 97.5% percentile in infections across the two-week intervals was",
as.character(version_with_min_lb$vlabel[which.max(version_with_min_lb$prop)]),
"where the median of",
version_with_min_lb$prop[which.max(version_with_min_lb$prop)] *100 %>% round(3),
"percent of simulation intervals was lowest.")- The implementatation that most frequently had the minimum 97.5% percentile in infections across the two-week intervals was \(Priors\,Do\,Not\,Vary\,by\,State\,and\,Date\) where the median of 60 percent of simulation intervals was lowest.
# switch labels to version without line breaks for clearer legend
names(pal) <- convert_labels(labels(pal), to_newlines=FALSE)
county_relab <- county %>%
mutate(vlabel=case_when(
version == "v7" ~ "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
version == "v5" ~ "$\\beta$ Centered at Survey Value",
version =="v6" ~ "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value",
)) %>%
mutate(vlabel =factor(vlabel,levels= c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values"))) %>%
mutate(vlabel=fct_reorder(vlabel,as.numeric(vlabel),.desc=TRUE)) %>%
filter(biweek>=6)
county_relab$vlabel %>% unique()## [1] $Pr(S_1|untested)$ Centered at Survey Value
## [2] $\\beta$ Centered at Survey Value
## [3] $Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values
## [4] $Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$
## 4 Levels: $Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ...
county_relab %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=vlabel),
alpha = .7) +
facet_wrap(~name,
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
legend.text=element_text(hjust=0)
) +
scale_fill_manual(values = pal,
labels=TeX(unique(as.character(county_relab$vlabel)))) +
labs(x="",
fill="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
guides(fill=guide_legend(nrow=2))# only two implementations included
versions_to_include <- c(version_with_max_median$vlabel[which.max(version_with_max_median$prop)],
version_with_min_median$vlabel[which.max(version_with_min_median$prop)]) %>%
unlist() %>% as.character()
versions_to_include## [1] "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values"
## [2] "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$"
# switch labels to version without line breaks for clearer legend
# names(pal) <- convert_labels(labels(pal), to_newlines=FALSE)
names(pal)## [1] "$Pr(S_1|untested)$ Centered at Survey Value"
## [2] "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values"
## [3] "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$"
## [4] "$\\beta$ Centered at Survey Value"
county_relab <- county %>%
mutate(vlabel=convert_labels(vlabel, to_newlines=FALSE)) %>%
filter(vlabel %in% versions_to_include) %>%
mutate(vlabel=case_when(
version == "v7" ~ "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
version == "v5" ~ "$\\beta$ Centered at Survey Value",
version =="v6" ~ "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value",
)) %>%
mutate(vlabel =factor(vlabel,levels= c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values"))) %>%
mutate(vlabel=fct_reorder(vlabel,as.numeric(vlabel),.desc=TRUE)) %>%
filter(biweek>=6)
county_relab$vlabel %>% unique()## [1] $Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values
## [2] $Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$
## 4 Levels: $Pr(S_1|untested)$ Centered at Survey Value ...
county_relab %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=vlabel),
alpha = .7) +
facet_wrap(~name,
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
legend.text=element_text(hjust=0)
) +
scale_fill_manual(values = pal,
labels=TeX(unique(as.character(county_relab$vlabel)))) +
labs(x="",
fill="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "Counties in Massachusetts") +
guides(fill=guide_legend(nrow=2))Heatmap
max_ratio_date <- county %>%
filter(version=="v7") %>%
group_by(biweek) %>%
summarize(m = mean(exp_cases_median/positive),
dates=paste(range(date),collapse=" to ")) %>%
slice_max(n=1, m)
cat("Maximum ratio, on average, occured on interval: ",
max_ratio_date$dates, "on biweek",
max_ratio_date$biweek)## Maximum ratio, on average, occured on interval: 2021-07-02 to 2021-07-15 on biweek 14
######################################################
# HEATMAP OF RATIO OF ESTIMATED TO OBSERVED
######################################################
heatmap_dat <- county %>%
filter(version == "v7") %>%
group_by(biweek, name, exp_cases_median, positive) %>%
summarize(date = min(date)) %>%
ungroup() %>%
mutate(ratio=exp_cases_median/positive) %>%
mutate(date_lab = format(as.POSIXct(date),format="%b %d %Y")) %>%
group_by(name) %>%
mutate(m = median(ratio)) %>%
ungroup()
date_labels_all <- unique(heatmap_dat$date_lab)
date_labels <- date_labels_all
date_labels[seq(1, length(date_labels_all),by=2)] <- ""
#date_labels <- date_labels_all[seq(1, length(date_labels_all),by=2)]
heatmap_dat %>%
ggplot(aes(x=fct_reorder(date_lab, date),
y = fct_reorder(name,m),
fill =exp_cases_median/positive)) +
geom_tile() +
# scale_x_discrete(breaks=breaks_date, labels = breaks_date) +
viridis::scale_fill_viridis(option="rocket", direction=-1) +
labs(x="",
y= "",
fill = "Ratio of Estimated Infections\nto Observed Infections",
title = "Ratio of Median Estimated Infections to Observed Infections\nfor Counties in Massachusetts") +
theme_c() +
theme(axis.text.x = element_text(size=13, angle=90),
axis.text.y = element_text(size = 17),
axis.title = element_text(size=16),
legend.title = element_text(face="bold", size=18),
plot.title =element_text(size=22),
plot.subtitle = element_text(size=20)) +
scale_x_discrete(breaks = date_labels_all, labels=date_labels)# which biweeks are highest?
heatmap_dat %>%
mutate(ratio=exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(m=median(ratio),
date=min(date)) %>%
arrange(desc(m))Testing Rate versus Ratio of Estimated to Observed
lb <- county %>% mutate(testrate = total/population ) %>% pull(testrate) %>% min()
###########################################################################
# plot relationship between ratio of estimated cases to observed
# against testing rate
###########################################################################
county %>%
ggplot(aes(x=total/population, y = exp_cases_median/positive)) +
geom_point(alpha=.03, size=.9) +
facet_wrap(~vlabel,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
theme_c(axis.text.x = element_text(size =9),
axis.title = element_text(size=16) ) +
theme(strip.text.x = element_text(size=11, color="white")) +
scale_y_continuous(n.breaks=10) +
geom_hline(yintercept=1, linetype=2,color="darkred", alpha=.8, linewidth=.5) +
labs(y = "Ratio of Estimated Infections\nto Observed Infections",
x = "Testing Rate",
subtitle = "All Time Intervals for Counties in Massachusetts",
title = "Relationship Between Testing Rate and\nRatio of Estimated Infections to Observed Infections")+
scale_x_continuous(limits=c(lb,.3), n.breaks=6)The ratio of the median estimated infections to observed infections plotted against the testing rate, where the testing rate is calculated as the total number tested in a two-week interval over the population size. When the priors are the same for all time intervals and states, there is minimal variability relationship between the testing rate and the ratio of estimated to observed infections, since the correction for incomplete testing and diagnostic test inaccuracy is identical for each time-interval and location. However, when we allow \(\beta\) or \(\Pr(S_1|\text{untested})\) to vary by state and time interval, there is much variability in the relationship. A horizontal line in red at 1 is included to reference; a ratio of exactly one would indicate no infections went unobserved.
###########################################################################
# plot relationship between ratio of estimated cases to observed
# against testing rate -- colored by county
###########################################################################
county %>%
ggplot(aes(x=total/population, y = exp_cases_median/positive, color=name)) +
geom_point(alpha=.03, size=.9) +
geom_linerange(aes(ymin = exp_cases_lb/positive,
ymax=exp_cases_ub/positive ),
alpha=.03) +
facet_wrap(~vlabel,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
theme_c(axis.text.x = element_text(size =9),
axis.title = element_text(size=16) ) +
theme(strip.text.x = element_text(size=11, color="white")) +
scale_y_continuous(n.breaks=10) +
geom_hline(yintercept=1, linetype=2,color="darkred", alpha=.8, linewidth=.5) +
labs(y = "Ratio of Estimated Infections to Observed Infections",
x = "Testing Rate",
subtitle = "All Time Intervals and All States",
title = "Relationship Between Testing Rate and\nRatio of Estimated Infections to Observed Infections")+
scale_x_continuous( n.breaks=4, limits=c(lb,.3))###########################################################################
# plot relationship between ratio of estimated cases to observed
# against testing rate -- faceted by county
###########################################################################
county %>%
ggplot(aes(x=total/population, y = exp_cases_median/positive)) +
geom_point(alpha=.03, size=.9) +
geom_linerange(aes(ymin = exp_cases_lb/positive,
ymax=exp_cases_ub/positive ),
alpha=.03) +
facet_grid(name~vlabel,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
theme_c(axis.text.x = element_text(size =9),
axis.title = element_text(size=16) ) +
theme(strip.text.x = element_text(size=11, color="white")) +
scale_y_continuous(n.breaks=10) +
geom_hline(yintercept=1, linetype=2,color="darkred", alpha=.8, linewidth=.5) +
labs(y = "Ratio of Estimated Infections to Observed Infections",
x = "Testing Rate",
subtitle = "All Time Intervals and All States",
title = "Relationship Between Testing Rate and\nRatio of Estimated Infections to Observed Infections")+
scale_x_continuous( n.breaks=4, limits=c(lb-.02,.3))Table Showing Covidestim Coverage
versions <- tibble(
version = c( "v3", "v5", "v6", "v7"),
vlabel = factor(
c( "$Pr(S_1|$untested) Centered at Survey Value" ,
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|$untested) and $\\beta$ Centered at Survey Values",
"Priors Do Not Vary by State or Date"
),
levels = c( "$Pr(S_1|$untested) Centered at Survey Value" ,
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|$untested) and $\\beta$ Centered at Survey Values",
"Priors Do Not Vary by State or Date"
))
)state <- tar_read(state_v1,
store =targets_dir) %>%
bind_rows(
tar_read(state_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v4,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v5,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v6,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v7,
store =targets_dir)
)
covidestim_state <- tar_read(covidestim_biweekly_state,
store =targets_dir)
state_covidestim <- state %>%
inner_join(covidestim_state, relationship="many-to-many",
by = c('fips'='state', 'biweek'='biweek')) %>%
distinct() %>%
filter(version %in% FILT_VERSIONS) county_covidestim <- county %>%
inner_join(covidestim, relationship = "many-to-many") %>%
select(-date) %>%
distinct()
get_coverage <- function(df, geo_scale) {
df %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "Contained in Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(version, in_interval) %>%
summarize(n=n()) %>%
group_by(version) %>%
mutate(total = sum(n)) %>%
ungroup() %>%
mutate(prop = n/total) %>%
select(version, prop, in_interval) %>%
pivot_wider(names_from = in_interval, values_from=prop, names_prefix="Percent ") %>%
mutate(across(contains("Percent"), ~.x*100 )) %>%
# mutate(across(contains("Percent"), ~paste0(round(.x,3),"%")))
left_join(versions) %>%
mutate(`Geographic Scale` = geo_scale) %>%
select( `Geographic Scale`,
Implementation = vlabel,
`Percent Below Interval`,
`Percent Contained in Interval`,
`Percent Above Interval`)
}
county_coverage <- county_covidestim %>% get_coverage(geo_scale="County Level (in Massachusetts)")
saveRDS(county_coverage, here("figures/table_data/county_coverage.RDS"))
state_coverage <- state_covidestim %>% get_coverage(geo_scale="State Level")
saveRDS(state_coverage, here("figures/table_data/state_coverage.RDS"))county_coverage %>%
bind_rows(state_coverage) %>%
group_by(`Geographic Scale`) %>%
arrange(desc(`Percent Contained in Interval`),
.by_group=TRUE) %>%
ungroup() %>%
select(-`Geographic Scale`) %>%
kbl(caption = "\\label{table:coverage}Coverage of Covidestim Medians",
booktabs=TRUE,
escape=FALSE,
digits=3) %>%
kable_styling() %>%
group_rows(start_row=1, end_row =4, group_label= "County") %>%
group_rows(start_row=5, end_row =8, group_label= "State") | Implementation | Percent Below Interval | Percent Contained in Interval | Percent Above Interval |
|---|---|---|---|
| County | |||
| \(Pr(S_1|\)untested) Centered at Survey Value | 1.282 | 87.500 | 11.218 |
| Priors Do Not Vary by State or Date | 1.075 | 74.194 | 24.731 |
| \(\beta\) Centered at Survey Value | 29.487 | 63.782 | 6.731 |
| \(Pr(S_1|\)untested) and \(\beta\) Centered at Survey Values | 34.615 | 62.179 | 3.205 |
| State | |||
| \(Pr(S_1|\)untested) Centered at Survey Value | 0.889 | 79.111 | 20.000 |
| Priors Do Not Vary by State or Date | 5.882 | 71.569 | 22.549 |
| \(\beta\) Centered at Survey Value | 16.296 | 70.963 | 12.741 |
| \(Pr(S_1|\)untested) and \(\beta\) Centered at Survey Values | 22.222 | 66.815 | 10.963 |
county_coverage <- county_covidestim %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections &
infections <= exp_cases_ub ~ "Contained in Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(version, in_interval) %>%
summarize(n=n()) %>%
group_by(version) %>%
mutate(total = sum(n)) %>%
ungroup() %>%
mutate(prop = n/total) %>%
select(version, prop, in_interval) %>%
pivot_wider(names_from = in_interval, values_from=prop, names_prefix="Percent ") %>%
mutate(across(contains("Percent"), ~.x*100 )) %>%
# mutate(across(contains("Percent"), ~paste0(round(.x,3),"%")))
left_join(versions) %>%
select( # `Geographic Scale`,
Implementation = vlabel,
`Percent Above Interval`,
`Percent Contained in Interval`,
`Percent Below Interval`)